{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "-028b2NqE02R",
   "metadata": {
    "executionInfo": {
     "elapsed": 3,
     "status": "ok",
     "timestamp": 1693291269574,
     "user": {
      "displayName": "Hiếu Nguyễn Xuân",
      "userId": "09184859202144170734"
     },
     "user_tz": -420
    },
    "id": "-028b2NqE02R"
   },
   "outputs": [],
   "source": [
    "import pickle\n",
    "\n",
    "import keras\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import os\n",
    "from keras.callbacks import LearningRateScheduler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a034672d",
   "metadata": {
    "executionInfo": {
     "elapsed": 1329,
     "status": "ok",
     "timestamp": 1693291338036,
     "user": {
      "displayName": "Hiếu Nguyễn Xuân",
      "userId": "09184859202144170734"
     },
     "user_tz": -420
    },
    "id": "a034672d"
   },
   "outputs": [],
   "source": [
    "from keras.layers import Conv1D, Dense, Dropout, Flatten, MaxPooling1D\n",
    "from tensorflow.keras.layers import Input\n",
    "from tensorflow.keras.models import Model\n",
    "from keras.regularizers import l2\n",
    "from scipy.interpolate import splev, splrep\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "591596f3",
   "metadata": {
    "executionInfo": {
     "elapsed": 2,
     "status": "ok",
     "timestamp": 1693291339297,
     "user": {
      "displayName": "Hiếu Nguyễn Xuân",
      "userId": "09184859202144170734"
     },
     "user_tz": -420
    },
    "id": "591596f3"
   },
   "outputs": [],
   "source": [
    "base_dir = \"dataset\"\n",
    "\n",
    "ir = 3 # interpolate interval\n",
    "before = 2\n",
    "after = 2\n",
    "\n",
    "# normalize\n",
    "scaler = lambda arr: (arr - np.min(arr)) / (np.max(arr) - np.min(arr))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "0cef782d",
   "metadata": {
    "executionInfo": {
     "elapsed": 2,
     "status": "ok",
     "timestamp": 1693291341601,
     "user": {
      "displayName": "Hiếu Nguyễn Xuân",
      "userId": "09184859202144170734"
     },
     "user_tz": -420
    },
    "id": "0cef782d"
   },
   "outputs": [],
   "source": [
    "from scipy.interpolate import CubicSpline\n",
    "def interpolate_numpy_array(arr, desired_length):\n",
    "    cs = CubicSpline(np.linspace(0, 1, len(arr)), arr)\n",
    "    x_new = np.linspace(0, 1, desired_length)\n",
    "    interpolated_arr = cs(x_new)\n",
    "    return interpolated_arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "KXj-pyPoXKm3",
   "metadata": {
    "executionInfo": {
     "elapsed": 3,
     "status": "ok",
     "timestamp": 1693291395991,
     "user": {
      "displayName": "Hiếu Nguyễn Xuân",
      "userId": "09184859202144170734"
     },
     "user_tz": -420
    },
    "id": "KXj-pyPoXKm3"
   },
   "outputs": [],
   "source": [
    "def load_data():\n",
    "    tm = np.arange(0, (before + 1 + after) * 60, step=1 / float(ir))\n",
    "\"\"\"\n",
    "We can change the file pkl for each case: T1,T2,T3,T4 in here.\n",
    "\"\"\"\n",
    "    with open(os.path.join(base_dir, \"T_1.pkl\"), 'rb') as f: # read preprocessing result\n",
    "        apnea_ecg = pickle.load(f)\n",
    "    x,x_train,x_val = [],[],[]\n",
    "    o_train, y_train = apnea_ecg[\"o_train\"], apnea_ecg[\"y_train\"]\n",
    "    groups_train = apnea_ecg[\"groups_train\"]\n",
    "\n",
    "    for i in range(len(o_train)):\n",
    "        min_distance_list, max_distance_list, mean_distance_list = o_train[i]\n",
    "\t\t# Curve interpolation\n",
    "        min_distance_list_inter = interpolate_numpy_array(min_distance_list,900)\n",
    "        max_distance_list_inter = interpolate_numpy_array(max_distance_list,900)\n",
    "        mean_distance_list_inter = interpolate_numpy_array(mean_distance_list,900)\n",
    "\"\"\"\n",
    "In this part we design the ablation to test the relationship of each terms: MinDP,MaxDP, MeanDP. By add this value in\n",
    "the list x we can represent all the case in the paper including: M1,M2,M3,M4,M5,M6,M7.\n",
    "MinDP: min_distance_list_inter\n",
    "MaxDP: max_distance_list_inter\n",
    "MeanDP: mean_distance_list_inter\n",
    "\"\"\"\n",
    "        x.append([min_distance_list_inter,max_distance_list_inter])\n",
    "    groups_training,groups_val=[],[]\n",
    "\n",
    "    num=[i for i in range(16707)]\n",
    "    trainlist, vallist,y_train, y_val = train_test_split(num,y_train, test_size=0.3,random_state=42,stratify =y_train)\n",
    "    print()\n",
    "    for i in trainlist:\n",
    "        x_train.append(x[i])\n",
    "        groups_training.append(groups_train[i])\n",
    "    for i in vallist:\n",
    "        x_val.append(x[i])\n",
    "        groups_val.append(groups_train[i])\n",
    "\n",
    "    x_train = np.array(x_train, dtype=\"float32\").transpose((0, 2, 1)) # convert to numpy format\n",
    "    y_train= np.array(y_train, dtype=\"float32\")\n",
    "    x_val = np.array(x_val, dtype=\"float32\").transpose((0, 2, 1)) # convert to numpy format\n",
    "    y_val = np.array(y_val, dtype=\"float32\")\n",
    "\n",
    "    x_test = []\n",
    "    o_test, y_test = apnea_ecg[\"o_test\"], apnea_ecg[\"y_test\"]\n",
    "    groups_test = apnea_ecg[\"groups_test\"]\n",
    "    for i in range(len(o_test)):\n",
    "        min_distance_list, max_distance_list, mean_distance_list = o_test[i]\n",
    "\t\t# Curve interpolation\n",
    "        min_distance_list_inter = interpolate_numpy_array(min_distance_list,900)\n",
    "        max_distance_list_inter = interpolate_numpy_array(max_distance_list,900)\n",
    "        mean_distance_list_inter = interpolate_numpy_array(mean_distance_list,900)\n",
    "        x_test.append([min_distance_list_inter,max_distance_list_inter])\n",
    "    x_test = np.array(x_test, dtype=\"float32\").transpose((0, 2, 1))\n",
    "    y_test = np.array(y_test, dtype=\"float32\")\n",
    "\n",
    "    return x_train,y_train, groups_training,x_val, y_val, groups_val, x_test, y_test, groups_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "GnDFx0fGCy1m",
   "metadata": {
    "executionInfo": {
     "elapsed": 5,
     "status": "ok",
     "timestamp": 1693291347748,
     "user": {
      "displayName": "Hiếu Nguyễn Xuân",
      "userId": "09184859202144170734"
     },
     "user_tz": -420
    },
    "id": "GnDFx0fGCy1m"
   },
   "outputs": [],
   "source": [
    "#the original works\n",
    "from keras.layers import BatchNormalization, LeakyReLU, GlobalMaxPooling1D\n",
    "from keras.models import Model\n",
    "from keras.layers import Input, Conv1D, MaxPooling1D, Dropout, Flatten, Dense\n",
    "from keras.regularizers import l2\n",
    "\n",
    "def create_model(input_shape, weight=1e-3):\n",
    "    inputs = Input(shape=input_shape)\n",
    "    leaky_relu = LeakyReLU()\n",
    "\n",
    "    # Conv1\n",
    "    x = Conv1D(64, kernel_size=5, strides=2, padding=\"valid\", kernel_initializer=\"he_normal\",\n",
    "               kernel_regularizer=l2(weight), bias_regularizer=l2(weight))(inputs)\n",
    "    x = BatchNormalization()(x)\n",
    "    x = leaky_relu(x)\n",
    "    x = MaxPooling1D(pool_size=3)(x)\n",
    "    x = Dropout(0.5)(x)  # Avoid overfitting\n",
    "    # Conv2\n",
    "    x = Conv1D(96, kernel_size=5, strides=2, padding=\"valid\", kernel_initializer=\"he_normal\",\n",
    "               kernel_regularizer=l2(weight), bias_regularizer=l2(weight))(x)\n",
    "    x = BatchNormalization()(x)\n",
    "    x = leaky_relu(x)\n",
    "    x = MaxPooling1D(pool_size=3)(x)\n",
    "    x = Dropout(0.5)(x)  # Avoid overfitting\n",
    "\n",
    "    # Conv3\n",
    "    x = Conv1D(128, kernel_size=5, strides=2, padding=\"valid\", kernel_initializer=\"he_normal\",\n",
    "               kernel_regularizer=l2(weight), bias_regularizer=l2(weight))(x)\n",
    "    x = BatchNormalization()(x)\n",
    "    x = leaky_relu(x)\n",
    "    x = GlobalMaxPooling1D()(x)\n",
    "    x = Dropout(0.5)(x)  # Avoid overfitting\n",
    "\n",
    "    # FC layers\n",
    "    x = Dense(128, activation=\"relu\")(x)\n",
    "    x = Dropout(0.5)(x)\n",
    "    x = Dense(64, activation=\"relu\")(x)\n",
    "    x = Dropout(0.5)(x)\n",
    "    outputs = Dense(2, activation=\"softmax\")(x)\n",
    "\n",
    "    model = Model(inputs=inputs, outputs=outputs)\n",
    "    return model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "6c830eb3",
   "metadata": {
    "executionInfo": {
     "elapsed": 2,
     "status": "ok",
     "timestamp": 1693291350253,
     "user": {
      "displayName": "Hiếu Nguyễn Xuân",
      "userId": "09184859202144170734"
     },
     "user_tz": -420
    },
    "id": "6c830eb3"
   },
   "outputs": [],
   "source": [
    "def lr_schedule(epoch, lr):\n",
    "    if epoch > 70 and \\\n",
    "            (epoch - 1) % 10 == 0:\n",
    "        lr *= 0.1\n",
    "    print(\"Learning rate: \", lr)\n",
    "    return lr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "ded3f606",
   "metadata": {
    "executionInfo": {
     "elapsed": 2,
     "status": "ok",
     "timestamp": 1693291351337,
     "user": {
      "displayName": "Hiếu Nguyễn Xuân",
      "userId": "09184859202144170734"
     },
     "user_tz": -420
    },
    "id": "ded3f606"
   },
   "outputs": [],
   "source": [
    "def plot(history):\n",
    "    \"\"\"Plot performance curve\"\"\"\n",
    "    fig, axes = plt.subplots(1, 2, figsize=(10, 4))\n",
    "    axes[0].plot(history[\"loss\"], \"r-\", history[\"val_loss\"], \"b-\", linewidth=0.5)\n",
    "    axes[0].set_title(\"Loss\")\n",
    "    axes[1].plot(history[\"accuracy\"], \"r-\", history[\"val_accuracy\"], \"b-\", linewidth=0.5)\n",
    "    axes[1].set_title(\"Accuracy\")\n",
    "    fig.tight_layout()\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "v6XhEYeu7cgM",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 44620,
     "status": "ok",
     "timestamp": 1693291590538,
     "user": {
      "displayName": "Hiếu Nguyễn Xuân",
      "userId": "09184859202144170734"
     },
     "user_tz": -420
    },
    "id": "v6XhEYeu7cgM",
    "outputId": "430113f3-4827-43a7-b1a7-11d636f3b212"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "train num: 11694\n",
      "test num: 16947\n"
     ]
    }
   ],
   "source": [
    "from keras import optimizers\n",
    "if __name__ == \"__main__\":\n",
    "\n",
    "    x_train,y_train, groups_train,x_val, y_val, groups_val, x_test, y_test, groups_test= load_data()\n",
    "\n",
    "    y_train = keras.utils.to_categorical(y_train, num_classes=2) # Convert to two categories\n",
    "    y_val = keras.utils.to_categorical(y_val, num_classes=2)\n",
    "    y_test = keras.utils.to_categorical(y_test, num_classes=2)\n",
    "\n",
    "    print(\"train num:\", len(y_train))\n",
    "    print(\"test num:\", len(y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e7ad2c78",
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.utils import to_categorical\n",
    "from sklearn.metrics import confusion_matrix, f1_score,roc_auc_score\n",
    "\n",
    "\n",
    "model = create_model(input_shape=x_train.shape[1:])\n",
    "model.summary()\n",
    "\n",
    "model.compile(optimizer='adam', loss=\"categorical_crossentropy\", metrics=['accuracy'])\n",
    "\n",
    "lr_scheduler = LearningRateScheduler(lr_schedule) # Dynamic adjustment learning rate\n",
    "history = model.fit(x_train, y_train, batch_size=128, epochs=100, validation_data=(x_val, y_val),\n",
    "                        callbacks=[lr_scheduler])\n",
    "\n",
    "# evaluate the model\n",
    "loss, accuracy = model.evaluate(x_test, y_test)\n",
    "print(\"Test loss: \", loss)\n",
    "print(\"Accuracy: \", accuracy)\n",
    "\n",
    "print(\"Testing:\")\n",
    "y_true, y_pred = y_test.argmax(axis=-1), np.argmax(model.predict(x_test, batch_size=1024, verbose=1), axis=-1)\n",
    "y_true = to_categorical(y_true, num_classes=2)  # Convert y_true to binary label indicators\n",
    "C = confusion_matrix(y_true[:, 1], y_pred, labels=(1, 0))  # Use y_true[:, 1] as binary label indicators for class 1\n",
    "TP, TN, FP, FN = C[0, 0], C[1, 1], C[1, 0], C[0, 1]\n",
    "acc, sn, sp = 1. * (TP + TN) / (TP + TN + FP + FN), 1. * TP / (TP + FN), 1. * TN / (TN + FP)\n",
    "precision = TP / (TP + FP)\n",
    "recall = sn  # Recall is equivalent to sensitivity\n",
    "f1 = 2 * (precision * recall) / (precision + recall)\n",
    "print(\"TP:{}, TN:{}, FP:{}, FN:{}, loss{}, acc{}, sn{}, sp{}, f1{}\".format(TP, TN, FP, FN,loss, acc, sn, sp, f1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "4a01665c",
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "executionInfo": {
     "elapsed": 1945,
     "status": "ok",
     "timestamp": 1693292509472,
     "user": {
      "displayName": "Hiếu Nguyễn Xuân",
      "userId": "09184859202144170734"
     },
     "user_tz": -420
    },
    "id": "4a01665c",
    "outputId": "a0263c7d-97a0-4ad8-a82a-3d366da42427"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "530/530 [==============================] - 1s 2ms/step\n"
     ]
    }
   ],
   "source": [
    "#Save file\n",
    "y_score = model.predict(x_test)\n",
    "output = pd.DataFrame({\"y_true\": y_test[:, 1], \"y_score\": y_score[:, 1], \"subject\": groups_test})\n",
    "output.to_csv(os.path.join(\"output\", \"file_name.csv\"), index=False)"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "gpuType": "T4",
   "machine_shape": "hm",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
